
************
* ANALYSIS *
************

* Labussiere and Vink (2020) 
* Replication Data for: The intergenerational impact of naturalisation reforms: 
* the citizenship status of children of immigrants in the Netherlands, 1995-2016
* Last modification: 22/01/2020

* This do-file describes the commands used to produce 
* 	- Table 1, Figures 4, 5, 7a, 7b (manuscript)
*	- Tables A4, A5, A6, A7, A8, Figures A1, A2, A3 (appendix)
*	- Annex A3 (appendix) - Figures 1 and 2
* It requires the package coefplot:
* https://ideas.repec.org/c/boc/bocode/s457686.html
* STATA version: 14

* IN: 6_final_addvar.dta (available within CBS secured environment upon request)
* See our Synthesis "Data construction" for information about this dataset (p.11)

* Identifying variables:
* RINPERSOON___ Personal individual identifier  (9-digit)
* YEAR_________ Year of observation (4-digit)
* NEW_IND______ Dummy = 1 at the respondent's first year of observation
* Other variables are described in the codebook.


use 6_final_addvar.dta, clear


	******************************************
	* DATA PREPARATION FOR SURVIVAL ANALYSIS *
	******************************************

* we exclude those who are Dutch from birth
drop if sum_nat_t5 == .b

* we exclude the years post-naturalisation
drop if eli_child == .z

* SNAPSPAN
* We add in varlist variables that are valid for the intervals (event variables)
snapspan rinpersoon lft ind_nat_t5 eli_child, gen(t0) replace

* STSET
stset lft, id(rinpersoon) failure(ind_nat_t5==1) origin(eli_child == 1)


	***********************
	* KAPLAN-MEIER CURVES *
	***********************

************
* FIGURE 4 *
************
* Kaplan-Meier failure function, by eligibility cohort
* We use cohort of eligibility 6 (three clusters):
sts graph, by(period_eli_child6) failure ci ///
xtitle("Years after eligibility", height(5)) ytitle("Proportion", height(6)) ///
plot1opts(lcolor("221 170 51")) ci1opts(color("238 238 187")) ///
plot2opts(lcolor("187 85 102")) ci2opts(color("221 170 179")) ///
plot3opts(lcolor("0 68 136")) ci3opts(color("128 162 196")) ///
plotopts(lwidth(medthick)) scale(1) xtick(0(1)20) title("") ///
legend(size(small)) graphregion(color(white)) ///


************
* FIGURE A3 *
************
* Kaplan-Meier failure function, all eligibility cohorts confounded
sts graph, failure ///
xtitle("Years after eligibility", height(5)) ytitle("Proportion", height(6)) ///
 plotopts(lwidth(medthick) lcolor("0 68 136")) ///
scale(1.1) xtick(0(1)25) title("") ///
graphregion(color(white))


	**************
	* COX MODELS * 
	**************

************************
* TABLE 1 and FIGURE 5 *
************************
* Main Cox model with :
* - mother's country of origin as stratum
* - interaction with the logarithm of time 
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2, ///
											tvc(i.period_eli_child6) ///
											texp(ln(_t)) strata(country_of_origin)	///
										`	noshow nohr	
* Note: Figure 5 has been manually created with Excel using the coefficients attached
* to the eligibility cohort categories. The excel file is available upon request.
										
*****************
* FIGURES A1-A2 *
*****************
* Schoenfeld residuals for the model without time interaction
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2 , ///
 noshow strata(country_of_origin)
estat phtest, detail
predict sch*, scaledsch

* Figure A1
scatter sch1 _t, msize(small) || lowess sch1 _t , graphregion(color(white)) ///
legend(label(1 "Schoenfeld residuals") label(2 "Fitted values")) ///
xtitle("Analysis time when records ends") title("")

* Figure A2
scatter sch2 _t, msize(small) || lowess sch2 _t, graphregion(color(white)) ///
legend(label(1 "Schoenfeld residuals") label(2 "Fitted values")) ///
xtitle("Analysis time when records ends") title("")

	
*********************
* FIGURE 1 ANNEX A3 *
*********************
* Robustness checks with alternative specifications  	
* Model A
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2 
estimates store Model_A	

* Model B: with strata(country_of_origin) 
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2 , ///
											noshow strata(country_of_origin)
estimates store Model_B

* Model C: with clustered standard errors vce(cluster country_of_origin) 										
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2, ///
											noshow vce(cluster country_of_origin)
estimates store Model_C

* Model D: with a different method for handling ties:  Efron strata 
xi: stcox i.period_eli_child6 EU_child bothp_eli i.secm_ma_agg2 i.secm_pa_agg2, ///
											noshow strata(country_of_origin) efron 
estimates store Model_D
	

* labels 
label variable _Isecm_ma_a_2 "Mother on benefits"
label variable _Isecm_ma_a_3 "Mother has no income"
label variable _Isecm_ma_a_4 "Mother unknown SES"
label variable _Isecm_pa_a_2 "Father on benefits"
label variable _Isecm_pa_a_3 "Father has no income"
label variable _Isecm_pa_a_4 "Father unknown SES"
label variable _Iperiod_el_2 "Eligibility 1998-2002"
label variable _Iperiod_el_3 "Eligibility 2003 and after"
label variable EU_child "EU country"
label variable bothp_eli "Both parents are eligible"

* Coefplot
coefplot (Model_A, label(Model A) mcolor("221 170 51") ciopts(lcolor("221 170 51"))) ///
 (Model_B, label(Model B: A with stratification) ///
	mcolor("187 85 102") ciopts(lcolor("187 85 102"))) ///
 (Model_C, label(Model C: A with clustered standard errors) ///
	mcolor("0 68 136") ciopts(lcolor("0 68 136"))) ///
 (Model_D, label(Model D: B with Efron method for ties) ///
	mcolor("0 0 0") ciopts(lcolor("0 0 0"))), ///
 coeflabels(EU_child="{bf: EU country}" bothp_eli="{bf: Both parents are eligible}", notick) ///
 headings(_Isecm_ma_a_2="{bf:Mother SES} {it:(ref: employed)}" ///
	_Isecm_pa_a_2="{bf:Father SES} {it:(ref: employed)}" ///
	_Iperiod_el_2="{bf:Eligibility cohort} {it:(ref: 1995-1997)}" , gap(0.8)) ///
 drop(_cons) eform xline(1, lpattern(dash) lwidth(thin) lcolor(black)) msymbol(d)  ///
 levels(95) legend(size(small) cols(1) rows(4) region(lcolor(white)))  ///
 omitted baselevels graphregion(color(white)) xtitle("Hazard ratio", height(6))  ///
 order(_Iperiod_el_2 _Iperiod_el_3 . EU_child . bothp_eli ///
 _Isecm_pa_a_2 _Isecm_pa_a_3 _Isecm_pa_a_4 _Isecm_ma_a_2 _Isecm_ma_a_3 _Isecm_ma_a_4)

	
*****************
* TABLES  A4-A6 *
*****************
* Additional robustness checks

* Table A4
* With two-year clusters of eligibility cohorts
xi: stcox i.period_eli_child3 EU_child i.secm_ma_agg2 i.secm_pa_agg2 bothp_eli, ///
											strata(country_of_origin) noshow
											
* Table A5 
* With year of eligibility
gen year_eli_child_ub2008 = year_eli_child
replace year_eli_child_ub2008 = 2008 if year_eli_child > 2008
xi: stcox i.year_eli_child_ub2008 EU_child i.secm_ma_agg2 i.secm_pa_agg2 bothp_eli, ///
noshow strata(country_of_origin)


* Table A6
* With father's country of origin
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2 bothp_eli, ///
											tvc(i.period_eli_child6) ///
											texp(ln(_t)) strata(country_of_origin_bis)	///
											noshow
	
	
	*************************************
	* CAUSE-SPECIFIC HAZARDS COX MODELS * 
	*************************************
	
use 6_final_addvar.dta, clear

* Data preparation

* we exclude those who are Dutch from birth
drop if sum_nat_t5 == .b

* we exclude the years post-naturalisation
drop if eli_child == .z

* SNAPSPAN
* We add in varlist variables that are valid for the intervals (event variables)
* We replace ind_nat_t5 by ind_nat_CR
snapspan rinpersoon lft ind_nat_CR eli_child, gen(t0) replace


****************
* TABLES A7-A8 *
****************
* Cause-specific hazard models with time-varying coefficients

* STSET IND_NAT_CR = 1 (naturalisation with both parents)
stset  lft, id(rinpersoon) failure(ind_nat_CR==1) origin(eli_child == 1)

* Table A7 
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
tvc(i.period_eli_child6) texp(ln(_t)) noshow strata(country_of_origin)
estimates store Model_1

* STSET IND_NAT_CR = 2	(naturalisation with only one parent)										
stset  lft, id(rinpersoon) failure(ind_nat_CR==2) origin(eli_child == 1)

* Table A8
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
tvc(i.period_eli_child6) texp(ln(_t)) noshow strata(country_of_origin)
estimates store Model_2
	
	
************			
* FIGURE 6 *
************
* Coeffplot comparing the coefficients of the cause-specific hazard models
* labels
label variable _Isecm_ma_a_2 "Mother on benefits"
label variable _Isecm_ma_a_3 "Mother has no income"
label variable _Isecm_ma_a_4 "Mother unknown  SES"
label variable _Isecm_pa_a_2 "Father on benefits"
label variable _Isecm_pa_a_3 "Father has no income"
label variable _Isecm_pa_a_4 "Father unknown SES"
label variable _Iperiod_el_2 "Eligibility 1998-2002"
label variable _Iperiod_el_3 "Eligibility 2003 and after"
label variable EU_child "EU country"

* coefplot
coefplot ///
 (Model_1, label(Naturalisation with both parents) ///
							mcolor("187 85 102") ciopts(lcolor("187 85 102"))) ///
 (Model_2, label(Naturalisation with one parent) ///
							mcolor("0 68 136") ciopts(lcolor("0 68 136"))), ///
drop(_cons) eform xline(1, lpattern(dash) lwidth(thin) lcolor(black)) msymbol(d) ///
levels(95) omitted baselevels legend(size(medsmall) cols(1) region(lcolor(white))) /// 
graphregion(color(white)) xtitle("Hazard ratio", height(6)) ///
coeflabels(EU_child="{bf: EU country}", notick) ///
headings(_Isecm_ma_a_2="{bf:Mother SES} {it:(ref: employed)}" ///
			_Isecm_pa_a_2="{bf:Father SES} {it:(ref: employed)}" ///
			_Iperiod_el_2="{bf:Eligibility cohort} {it:(ref: 1995-1997)}", gap(0.8)) ///
order(_Iperiod_el_2 _Iperiod_el_3 . EU_child  ///
	_Isecm_pa_a_2 _Isecm_pa_a_3 _Isecm_pa_a_4  ///
	_Isecm_ma_a_2 _Isecm_ma_a_3 _Isecm_ma_a_4)

*********************
* FIGURES 7a and 7b *
*********************
* Note: Figures 7a and 7b have been manually created with Excel using the 
* coefficients attached * to the eligibility cohort categories in tables A7 and A8.
* The excel file is available upon request.
			

*********************
* FIGURE 2 ANNEX A3 *
*********************
* Robustness checks for the cause-specific hazard models

* STSET IND_NAT_CR = 1  *  (naturalisation with both parents)
*************************
stset  lft, id(rinpersoon) failure(ind_nat_CR==1) origin(eli_child == 1)

* model A
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2
estimates store Model_A1

* model B
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
strata(country_of_origin)
estimates store Model_B1

* model C
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
vce(cluster country_of_origin)
estimates store Model_C1

* model D
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
strata(country_of_origin) efron
estimates store Model_D1

* STSET IND_NAT_CR = 2 *  (naturalisation with only one parent)
************************									
stset  lft, id(rinpersoon) failure(ind_nat_CR==2) origin(eli_child == 1)

* model A
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2
estimates store Model_A2

* model B
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
strata(country_of_origin)
estimates store Model_B2

* model C
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
vce(cluster country_of_origin)
estimates store Model_C2

* model D
xi: stcox i.period_eli_child6 EU_child i.secm_ma_agg2 i.secm_pa_agg2, ///
strata(country_of_origin) efron
estimates store Model_D2
											
* labels
label variable _Isecm_ma_a_2 "Mother on benefits"
label variable _Isecm_ma_a_3 "Mother has no income"
label variable _Isecm_ma_a_4 "Mother unknown  SES"
label variable _Isecm_pa_a_2 "Father on benefits"
label variable _Isecm_pa_a_3 "Father has no income"
label variable _Isecm_pa_a_4 "Father unknown SES"
label variable _Iperiod_el_2 "Eligibility 1998-2002"
label variable _Iperiod_el_3 "Eligibility 2003 and after"
label variable EU_child "EU country"

* Coefficient plot
coefplot ///
(Model_A1, label(Model A) mcolor("221 170 51") ciopts(lcolor("221 170 51"))) ///
 (Model_B1, label(Model B: A with stratification) mcolor("187 85 102") ciopts(lcolor("187 85 102"))) ///
 (Model_C1, label(Model C: A with clustered standard errors) mcolor("0 68 136") ciopts(lcolor("0 68 136"))) ///
 (Model_D1, label(Model D: B with Efron method for ties) mcolor("0 0 0") ciopts(lcolor("0 0 0"))), ///
  bylabel(Event with both parents) || ///
(Model_A2, label(Model A) mcolor("221 170 51") ciopts(lcolor("221 170 51"))) ///
 (Model_B2, label(Model B: A with stratification) mcolor("187 85 102") ciopts(lcolor("187 85 102"))) ///
 (Model_C2, label(Model C: A with clustered standard errors) mcolor("0 68 136") ciopts(lcolor("0 68 136"))) ///
 (Model_D2, label(Model D: B with Efron method for ties) mcolor("0 0 0") ciopts(lcolor("0 0 0"))), ///
 bylabel(Event with one parent) ||, /// 
 coeflabels(EU_child="{bf: EU country}", notick) ///
 headings(_Isecm_ma_a_2="{bf:Mother SES} {it:(ref: employed)}" ///
		_Isecm_pa_a_2="{bf:Father SES} {it:(ref: employed)}" ///
		_Iperiod_el_2="{bf:Eligibility cohort} {it:(ref: 1995-1997)}" , gap(0.8)) ///
drop(_cons) eform xline(1, lpattern(dash) lwidth(thin) lcolor(black)) msymbol(d) levels(95)  ///
legend(size(small) cols(1) rows(4) position(3) region(lcolor(white))) ///
omitted baselevels graphregion(color(white)) xtitle("Hazard ratio", height(6))  ///
order(_Iperiod_el_2 _Iperiod_el_3 . EU_child  ///
		_Isecm_pa_a_2 _Isecm_pa_a_3 _Isecm_pa_a_4 ///
		_Isecm_ma_a_2 _Isecm_ma_a_3 _Isecm_ma_a_4)
	
	
	
	
	
	